##
## BWC Meta-analysis
## R Code to Read Data from MySQL server
## and run analyses
## Created by D.B.Wilson
## Lasted Edited June 5, 2020
##

##------------------------------------------------------------------
##
## Load libraries
##
##------------------------------------------------------------------
library("tidyverse")
library("dplyr")
library("knitr")
library("kableExtra")
library("metafor")
library("janitor")
library("data.table")
library("tools")
library("stringr")
library("lubridate")
library("svglite")
library("ggplot2")
library("ggthemes")
options(width=200,scipen=999)

##------------------------------------------------------------------
##
## Read CSV files
##
##------------------------------------------------------------------

setwd("C:/Users/awjor/Dropbox/misconduct/Replication")

data<- read.csv("data/effects_time.csv", header=TRUE, stringsAsFactors=FALSE)
library(ggplot2)
# Define custom arrowhead shape with spike-like ends
spike_arrow <- arrow(type = "open", length = unit(0.2, "inches"), ends = "last")
spike_arrow2 <- arrow(type = "open", length = unit(0.2, "inches"), ends = "both")

# Define y-axis limits
y_lower_limit <- -1
y_upper_limit <- 1

# Identify data points where CI limits extend beyond y-axis limits
data$exceed_lower <- data$min95_complaints_postc1_3 < y_lower_limit & data$max95_complaints_postc1_3 <= y_upper_limit
data$exceed_both <- data$min95_complaints_postc1_3 < y_lower_limit & data$max95_complaints_postc1_3 > y_upper_limit
data$exceed_upper_only <- data$min95_complaints_postc1_3 >= y_lower_limit & data$max95_complaints_postc1_3 > y_upper_limit

# Cap the error bars at y-axis limits
data$min95_complaints_postc1_3_capped <- pmax(data$min95_complaints_postc1_3, y_lower_limit)
data$max95_complaints_postc1_3_capped <- pmin(data$max95_complaints_postc1_3, y_upper_limit)

# Define breaks and labels for time
breaks <- c(1, 2, 3, 4)
#labels <- c("2006,9-2007,8", "2007,9 - 2008,7", "2014,9 - 2015,10", "2015,11 - 2016,10", "2016,11 - 2017,10", "2017,11 - 2018,11")
labels <- c("2006,9-2008,7",  "2014,9 - 2015,10", "2015,11 - 2017,8",  "2017,8 - 2018,11")


# Plot for betacomplaints_postc1_3
plot_complaints <- ggplot(data, aes(x = factor(time, levels = breaks, labels = labels), y = betacomplaints_postc1_3)) +
  geom_line(color = "blue", size = 1.3, group = 1) +
  geom_errorbar(aes(ymin = min95_complaints_postc1_3, ymax = max95_complaints_postc1_3),
                width = 0.2, color = "blue") +
  geom_segment(data = subset(data, exceed_lower), aes(x = factor(time, levels = breaks, labels = labels), xend = factor(time, levels = breaks, labels = labels), y = max95_complaints_postc1_3_capped, yend = y_lower_limit),
               color = "blue", size = 0.5, arrow = spike_arrow) +
  geom_segment(data = subset(data, exceed_both), aes(x = factor(time, levels = breaks, labels = labels), xend = factor(time, levels = breaks, labels = labels), y = y_lower_limit, yend = y_upper_limit),
               color = "blue", size = 0.5, arrow = spike_arrow2) +
  geom_segment(data = subset(data, exceed_upper_only), aes(x = factor(time, levels = breaks, labels = labels), xend = factor(time, levels = breaks, labels = labels), y = min95_complaints_postc1_3_capped, yend = y_upper_limit),
               color = "blue", size = 0.5, arrow = spike_arrow) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +  # Add the null effects line
  labs(x = "Time Period", y = "Effect Sizes on Complaints") +
  theme_minimal() +
  scale_y_continuous(limits = c(y_lower_limit, y_upper_limit))


print(plot_complaints)
ggsave(filename = "output/figures/complaints_overtime2.pdf", plot =plot_complaints,width = 10, height = 6)

# Define y-axis limits for arrests
y_lower_limit <- -20
y_upper_limit <- 45


# Identify data points where CI limits extend beyond y-axis limits
data$exceed_lower <- data$min95_arrests_postc1_3 < y_lower_limit & data$max95_arrests_postc1_3 <= y_upper_limit
data$exceed_both <- data$min95_arrests_postc1_3 < y_lower_limit & data$max95_arrests_postc1_3 > y_upper_limit
data$exceed_upper_only <- data$min95_arrests_postc1_3 >= y_lower_limit & data$max95_arrests_postc1_3 > y_upper_limit

# Cap the error bars at y-axis limits
data$min95_arrests_postc1_3_capped <- pmax(data$min95_arrests_postc1_3, y_lower_limit)
data$max95_arrests_postc1_3_capped <- pmin(data$max95_arrests_postc1_3, y_upper_limit)

# Define breaks and labels for time
 
# Plot for betaarrests_postc1_3
plot_arrests <- ggplot(data, aes(x = factor(time, levels = breaks, labels = labels), y = betaarrests_postc1_3)) +
  geom_line(color = "blue", size = 1.3, group = 1) +
  geom_errorbar(aes(ymin = min95_arrests_postc1_3, ymax = max95_arrests_postc1_3),
                width = 0.2, color = "blue") +
  geom_segment(data = subset(data, exceed_lower), aes(x = factor(time, levels = breaks, labels = labels), xend = factor(time, levels = breaks, labels = labels), y = max95_arrests_postc1_3_capped, yend = y_lower_limit),
               color = "blue", size = 0.5, arrow = spike_arrow) +
  geom_segment(data = subset(data, exceed_both), aes(x = factor(time, levels = breaks, labels = labels), xend = factor(time, levels = breaks, labels = labels), y = y_lower_limit, yend = y_upper_limit),
               color = "blue", size = 0.5, arrow = spike_arrow2) +
  geom_segment(data = subset(data, exceed_upper_only), aes(x = factor(time, levels = breaks, labels = labels), xend = factor(time, levels = breaks, labels = labels), y = min95_arrests_postc1_3_capped, yend = y_upper_limit),
               color = "blue", size = 0.5, arrow = spike_arrow) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +  # Add the null effects line
  labs(x = "Time Period", y = "Effect Sizes on Arrests") +
  theme_minimal() +
  scale_y_continuous(limits = c(y_lower_limit, y_upper_limit))


print(plot_arrests)
ggsave(filename = "output/figures/arrests_overtime2.pdf", plot =plot_arrests,width = 10, height = 6)


# Define y-axis limits
y_lower_limit <- -5
y_upper_limit <- 5

# Identify data points where CI limits extend beyond y-axis limits
data$exceed_lower <- data$min95_type1_arrests_postc1_3 < y_lower_limit & data$max95_type1_arrests_postc1_3 <= y_upper_limit
data$exceed_both <- data$min95_type1_arrests_postc1_3 < y_lower_limit & data$max95_type1_arrests_postc1_3 > y_upper_limit
data$exceed_upper_only <- data$min95_type1_arrests_postc1_3 >= y_lower_limit & data$max95_type1_arrests_postc1_3 > y_upper_limit

# Cap the error bars at y-axis limits
data$min95_type1_arrests_postc1_3_capped <- pmax(data$min95_type1_arrests_postc1_3, y_lower_limit)
data$max95_type1_arrests_postc1_3_capped <- pmin(data$max95_type1_arrests_postc1_3, y_upper_limit)

 
# Plot for betatype1_arrests_postc1_3
plot_type1_arrests <- ggplot(data, aes(x = factor(time, levels = breaks, labels = labels), y = betatype1_arrests_postc1_3)) +
  geom_line(color = "blue", size = 1.3, group = 1) +
  geom_errorbar(aes(ymin = min95_type1_arrests_postc1_3, ymax = max95_type1_arrests_postc1_3),
                width = 0.2, color = "blue") +
  geom_segment(data = subset(data, exceed_lower), aes(x = factor(time, levels = breaks, labels = labels), xend = factor(time, levels = breaks, labels = labels), y = max95_type1_arrests_postc1_3_capped, yend = y_lower_limit),
               color = "blue", size = 0.5, arrow = spike_arrow) +
  geom_segment(data = subset(data, exceed_both), aes(x = factor(time, levels = breaks, labels = labels), xend = factor(time, levels = breaks, labels = labels), y = y_lower_limit, yend = y_upper_limit),
               color = "blue", size = 0.5, arrow = spike_arrow2) +
  geom_segment(data = subset(data, exceed_upper_only), aes(x = factor(time, levels = breaks, labels = labels), xend = factor(time, levels = breaks, labels = labels), y = min95_type1_arrests_postc1_3_capped, yend = y_upper_limit),
               color = "blue", size = 0.5, arrow = spike_arrow) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +  # Add the null effects line
  labs(x = "Time Period", y = "Effect Sizes on Type 1 Arrests") +
  theme_minimal() +
  scale_y_continuous(limits = c(y_lower_limit, y_upper_limit))

# Print the plot
 
print(plot_type1_arrests)




# Define y-axis limits
y_lower_limit <- -25
y_upper_limit <- 50

# Identify data points where CI limits extend beyond y-axis limits
data$exceed_lower <- data$min95_nonindex_arrests_postc1_3 < y_lower_limit & data$max95_nonindex_arrests_postc1_3 <= y_upper_limit
data$exceed_both <- data$min95_nonindex_arrests_postc1_3 < y_lower_limit & data$max95_nonindex_arrests_postc1_3 > y_upper_limit
data$exceed_upper_only <- data$min95_nonindex_arrests_postc1_3 >= y_lower_limit & data$max95_nonindex_arrests_postc1_3 > y_upper_limit

# Cap the error bars at y-axis limits
data$min95_nonindex_arrests_postc1_3_capped <- pmax(data$min95_nonindex_arrests_postc1_3, y_lower_limit)
data$max95_nonindex_arrests_postc1_3_capped <- pmin(data$max95_nonindex_arrests_postc1_3, y_upper_limit)


# Plot for betanonindex_arrests_postc1_3
plot_nonindex_arrests <- ggplot(data, aes(x = factor(time, levels = breaks, labels = labels), y = betanonindex_arrests_postc1_3)) +
  geom_line(color = "blue", size = 1.3, group = 1) +
  geom_errorbar(aes(ymin = min95_nonindex_arrests_postc1_3, ymax = max95_nonindex_arrests_postc1_3),
                width = 0.2, color = "blue") +
  geom_segment(data = subset(data, exceed_lower), aes(x = factor(time, levels = breaks, labels = labels), xend = factor(time, levels = breaks, labels = labels), y = max95_nonindex_arrests_postc1_3_capped, yend = y_lower_limit),
               color = "blue", size = 0.5, arrow = spike_arrow) +
  geom_segment(data = subset(data, exceed_both), aes(x = factor(time, levels = breaks, labels = labels), xend = factor(time, levels = breaks, labels = labels), y = y_lower_limit, yend = y_upper_limit),
               color = "blue", size = 0.5, arrow = spike_arrow2) +
  geom_segment(data = subset(data, exceed_upper_only), aes(x = factor(time, levels = breaks, labels = labels), xend = factor(time, levels = breaks, labels = labels), y = min95_nonindex_arrests_postc1_3_capped, yend = y_upper_limit),
               color = "blue", size = 0.5, arrow = spike_arrow) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +  # Add the null effects line
  labs(x = "Time Period", y = "Effect Sizes on Non-index Arrests") +
  theme_minimal() +
  scale_y_continuous(limits = c(y_lower_limit, y_upper_limit))

# Print the plot

print(plot_nonindex_arrests)
 
# Save the plot to a PDF file
ggsave(filename = "output/figures/nonindex_arrests_overtime2.pdf", plot =plot_nonindex_arrests,width = 10, height = 6)








# Combine all figures into one figure

data<- read.csv("data/effects_time.csv", header=TRUE, stringsAsFactors=FALSE)
library(ggplot2)
# Define custom arrowhead shape with spike-like ends
spike_arrow <- arrow(type = "open", length = unit(0.2, "inches"), ends = "last")
spike_arrow2 <- arrow(type = "open", length = unit(0.2, "inches"), ends = "both")

# Define y-axis limits
y_lower_limit <- -1
y_upper_limit <- 1

# Identify data points where CI limits extend beyond y-axis limits
data$exceed_lower <- data$min95_complaints_postc1_3 < y_lower_limit & data$max95_complaints_postc1_3 <= y_upper_limit
data$exceed_both <- data$min95_complaints_postc1_3 < y_lower_limit & data$max95_complaints_postc1_3 > y_upper_limit
data$exceed_upper_only <- data$min95_complaints_postc1_3 >= y_lower_limit & data$max95_complaints_postc1_3 > y_upper_limit

# Cap the error bars at y-axis limits
data$min95_complaints_postc1_3_capped <- pmax(data$min95_complaints_postc1_3, y_lower_limit)
data$max95_complaints_postc1_3_capped <- pmin(data$max95_complaints_postc1_3, y_upper_limit)

# Define breaks and labels for time
breaks <- c(1, 2, 3, 4)
#labels <- c("2006,9-2007,8", "2007,9 - 2008,7", "2014,9 - 2015,10", "2015,11 - 2016,10", "2016,11 - 2017,10", "2017,11 - 2018,11")
labels <- c("2006,9-2008,7",  "2014,9 - 2015,10", "2015,11 - 2017,8",  "2017,8 - 2018,11")


# Rename beta columns to have an underscore after beta
data <- data %>%
  rename(
    beta_complaints_postc1_3 = betacomplaints_postc1_3,
    beta_arrests_postc1_3 = betaarrests_postc1_3,
    beta_type1_arrests_postc1_3 = betatype1_arrests_postc1_3,
    beta_nonindex_arrests_postc1_3 = betanonindex_arrests_postc1_3
  )

# Reshape data into long format and drop rows with NA in measure
data_long <- data %>%
  select(time, 
         beta_complaints_postc1_3, min95_complaints_postc1_3, max95_complaints_postc1_3,
         beta_arrests_postc1_3, min95_arrests_postc1_3, max95_arrests_postc1_3,
         beta_type1_arrests_postc1_3, min95_type1_arrests_postc1_3, max95_type1_arrests_postc1_3,
         beta_nonindex_arrests_postc1_3, min95_nonindex_arrests_postc1_3, max95_nonindex_arrests_postc1_3) %>%
  pivot_longer(
    cols = -time,
    names_to = c(".value", "measure"),
    names_pattern = "(beta|min95|max95)_(.*)"
  ) %>%
  mutate(measure = factor(measure, levels = c("complaints_postc1_3", "arrests_postc1_3", "type1_arrests_postc1_3", "nonindex_arrests_postc1_3"),
                          labels = c("Complaints", "Arrests", "Type 1 Arrests", "Non-index Arrests"))) %>%
  drop_na(measure)

# Check the structure of the reshaped data
str(data_long)

# Define y-axis limits for each measure
y_limits <- list(
  "Complaints" = c(-1, 1),
  "Arrests" = c(-20, 45),
  "Type 1 Arrests" = c(-5, 5),
  "Non-index Arrests" = c(-25, 50)
)

# Plot the bar charts with confidence intervals
plot_combined <- ggplot(data_long, aes(x = factor(time, levels = breaks, labels = labels), y = beta, fill = measure)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7, fill = "#808000") +
  geom_errorbar(aes(ymin = min95, ymax = max95), position = position_dodge(0.7), width = 0.2) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  facet_wrap(~ measure, scales = "free_y") +
  labs(x = "Time Period", y = "Effect Size", fill = "Measure") +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 10, face = "bold"),
    legend.position = "none",
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.major.x = element_line(color = "gray"),
    panel.grid.minor.x = element_blank(),
    plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm")
  )

# Print the combined plot
print(plot_combined)

ggsave(filename = "output/figures/overtime_all.pdf", plot =plot_combined,width = 10, height = 6)

# Plot the bar charts with confidence intervals, stacked vertically
plot_combined <- ggplot(data_long, aes(x = factor(time, levels = breaks, labels = labels), y = beta, fill = measure)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7, fill = "#808000") +
  geom_errorbar(aes(ymin = min95, ymax = max95), position = position_dodge(0.7), width = 0.2) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  facet_wrap(~ measure, ncol = 1, scales = "free_y") +  # Stack plots vertically
  labs(x = "Time Period", y = "Effect Size", fill = "Measure") +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 14),  # Larger font size for variable names
    axis.text.x = element_text(size = 14, angle = 45, hjust = 1),  # Larger font size for time period labels
    axis.title.x = element_text(size = 16, face = "bold"),  # Larger font size for x-axis label
    axis.title.y = element_text(size = 16, face = "bold"),  # Larger font size for y-axis label
    legend.position = "bottom",
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.major.x = element_line(color = "gray"),
    panel.grid.minor.x = element_blank(),
    plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm")
  )

# Print the combined plot
print(plot_combined)
ggsave(filename = "output/figures/overtime_all_2.pdf", plot =plot_combined, width = 12, height = 12)

 